#' ---
#' title: "Whose Dimension is it anyway: Graphs and Tables"
#' author: "Lukas F. Stoetzer"
#' ---

#' Script estimates irt models for Senator Representation Study

  # Libraries 
    library(tidyverse)
    library(reshape2)
    library(extrafont)
    
  # Utiltiy Functions
    source("utilits.R")


## 0) Load Results ==============
  
  # Prepare Item Names
    cces_08_items <- data.frame(
      study="CCES08",item=1:9,
      short=c("TrpsIrq", "MnmWg", "StmCll",
              "FISA","Schip","GaMrrg",
              "Hsng", "NAFTA", "Bank")) 
    
    cces_10_items <- data.frame(
      study="CCES10",item=1:9,
      short=c("RcvryRnvst", "Schip", "ClnEnrgy",
              "HlthRfrm","FinRef","EdAdT",
              "FISA", "StmCll", "Bank")) 
    
    cces_12_items <- data.frame(
      study="CCES12",item=c(1:9),
      short=c("RyanBud", "SimpBoyles", 
              "TaxCut",
              "BrthCntrl","TrdKorea","RplHlth",
              "Pplne", "Hlth", "EdAdT")) 
    
    # Prepare Item Names
    srs_items <- data.frame(
      study="SRS",item=1:27, 
      item_type = c("Hard item", "Item" , "Hard item", "Hard item","Item",
               "Easy item","Item","Item","Item","Hard item","Hard item","Easy item",
               "Hard item","Item","Item","Item",
               "Easy item","Easy item","Easy item","Easy item",
               "Item","Item","Easy item","Item","Easy item",
               "Hard item","Hard item"),
      short = c("BsnStr","NclWr","CETA","Bnk","GntanmBy","FmlsTx","ChlsCr","RdsFrst","ChldSf",
                "StpPrvt Fdrl Jbs","MltaBs","MnmWg","PrtcAms","CgaTx","MrcEm","FmlPla","RseInTx2","RseInTx",
                "UnbrnVctms","HtCrm","FlEcn",
                "GsRdc","BaTrtMltry","Armr","NatWld","PyRfl","FrnAct"),
      desc = c("Jumpstart Our Business Strength Act","Remove Funding for Bunker Buster Nuclear Warhead",
               "Central American Free Trade Agreement","Bankruptcy Abuse Prevention and Consumer Protection Act",
               "Remove Funding for Guantanamo Bay Detention Center",
               "Working Families Tax Relief Act",  "Child Care Funding for Welfare Recipients",
               "Prohibiting Roads in Tongass National Forest", "Child Safety Locks Amendment",
               "Stopping Privatization of Federal Jobs", "Military Base Closure Delays",
               "Minimum Wage Increase", "Protection of Lawful Commerce in Arms Act",
               "Cigarette Tax Increase", "Disapproval of Mercury Emissions Rule",
               "Family Planning Aid Policy (Mexico City Policy)", "Raise Tax Rate on Income over One Million Dollars",
               "Raise Tax Rate on Highest Income Bracket" , "Unborn Victims of Violence Act",
               "Federal Hate Crimes","Fuel Economy Standards",
               "Greenhouse Gas Reduction and Credit Trading System", "Banning Torture by U.S. Military Interrogators",
               "Broaden Definition of Armor Piercing Ammunition", "Prohibit Drilling in Arctic National Wildlife Refuge",
               "Overtime Pay Regulations",   "Class Action Fairness Act")
    )  
    
    # Cbmn 
    items <- bind_rows(cces_08_items,
                       cces_10_items,
                       cces_12_items,
                       srs_items)


## 1) Estimated Discrimination Paramters CCES ============
    
    # Load Results
    load("res_cces08_scaling.Rdata")
    load("res_cces10_scaling.Rdata")
    load("res_cces12_scaling.Rdata")
    load("res_srs_scaling.Rdata")
    
    
    # Bind Gamma Estimates in List
    rls <- list(leg_08,  resp_08,
                leg_10,  resp_10,
                leg_12,  resp_12,
                leg_srs, resp_srs
    )
    
    rm(leg_08,  resp_08,
       leg_10,  resp_10,
       leg_12,  resp_12,
       leg_srs, resp_srs)
    
    # Function to get alpha and beta estimates
    df_item <- do.call("rbind",lapply(1:length(rls),function(i) data.frame("item"=1:dim(rls[[i]]$beta)[2],
                                                                           "beta"=apply((rls[[i]]$beta[,,1]),2,median),
                                                                           "alpha"=apply((rls[[i]]$beta[,,2]),2,median),
                                                                           "case"=i)))
    
    # Create Data.frame
    df_item$case <- as.character(factor(df_item$case, 
                                        labels = c("CCES08_leg","CCES08_resp",#"CCES08_respind","CCES08_resppid",
                                                   "CCES10_leg","CCES10_resp",#"CCES10_respind","CCES10_resppid",
                                                   "CCES12_leg","CCES12_resp",
                                                   "SRS_leg","SRS_resp"
                                        )))
    # Respahe
    df_item <- tidyr::separate(df_item,"case",c("study","type")) %>%
      gather(cat,val,alpha,beta) %>%
      spread(type,val) 
    
    # Add Item Inoramtion
    df_items_par <- left_join(df_item,items, by=c("study","item"))
    

    # Add Description for some items 
    df_items_par <- mutate(df_items_par, 
                           sel_desc = as.factor(case_when(
                             study == "CCES08" & item %in% c(1) ~ "dark",
                             study == "CCES10" & item %in% c(4) ~ "dark",
                             study == "CCES12" & item %in% c(3) ~ "dark",
                             study == "SRS" & item %in% c(12,10) ~ "dark",
                             TRUE ~ "light")))
    # Make a plot
    lim <- 1.1*max(abs(filter(df_items_par,cat=="beta")$leg))
    df_items_beta <- filter(df_items_par,cat=="beta")  
    
    
    # Plot 
    ggplot() +
      geom_point(data=df_items_beta,aes(x=leg,y=resp, col= sel_desc)) +
      geom_text(data=filter(df_items_beta, sel_desc == "dark"), 
                aes(x=leg,y=resp, label=short),
                nudge_x = -.5, nudge_y = -1.5) +
      theme_minimal() + 
      scale_color_grey(start = 0, end = 0.8) +
      facet_wrap(~ study, ncol = 2) +
      ylim(c(-lim,lim)) + xlim(c(-lim,lim)) +
      geom_abline(a=0,b=1,col="grey",alpha=0.4) + 
      geom_hline(yintercept = 0,col="grey",alpha=0.4) + 
      geom_vline(xintercept =0,col="grey",alpha=0.4)  +
      theme(#panel.grid.major = element_blank(), 
            panel.grid.minor = element_blank(),
            panel.background = element_rect("grey98",linetype = "blank"), 
            legend.position = "None")  +
      ylab("Respondent Discrimination") + xlab("Senator Discrimination")

    # Save Plot
    ggsave("fig_alpha_scatter.pdf", width = 6,height = 6,dpi=300)         

   # Additional Information  
   df_items_beta %>%
     mutate(ratio = leg/resp) %>%
     group_by(study) %>%
     summarise(correl = cor(leg,resp),
               reg_int = coef(lm(resp~leg))[1],
               reg_line = coef(lm(resp~leg))[2])

    
## 1) Estimated Error Variances CCES ============
  
    # Load Results from GIRT
    load("cces2008_girt_models.Rdata")
    load("cces2010_girt_models.Rdata")
    load("cces2012_girt_models.Rdata")
    load("SRS_irt_bayes_m2.Rdata")
    load("SRS_irt_bayes_m4.Rdata")
    

## 1.1) Error Variance for Partisians =========
    
  # Bind Gamma Estimates in List
    rsl <- list(res_2008_m2$gamma,
                res_2010_m2$gamma,
                res_2012_m2$gamma,
                res_SRS2_m2$gamma
                )
    
  # Function to calulcate Error Variance given 
    sigma_var <- function(gamma, x){
      exp(x%*%gamma)
    }

  # Define Scenarios for Plot
   x.sce <- cbind(1,c(0,0,1,1),c(0,1,0,1),c(0,0,0,1))

   rbind_array <- function(aray) do.call("rbind",lapply(seq(dim(aray)[3]), function(x) aray[ , , x]))
   
  # Calculate resulting CI Intervals from Posterori draws
    df_error <- data.frame(do.call("rbind",
            lapply(1:4 ,function(i) 
              t(apply(t(apply(rbind_array(rsl[[i]]),1,sigma_var,x=x.sce)),2,quantile,c(0.025,0.05,0.5,0.95,0.975))))
            ))
  
  # Define Data.frame
    names(df_error) <- c("low","low2","mid","high2","high")
    df_error$type <- c("Ind., low exp.", "Ind., high exp.",
                       "Part., low exp.","Part., high exp.") 
    df_error$study <- sort(rep(c("CCES08","CCES10","CCES12","SRS"),4))
    
    df_leg <- data.frame("low"=1,"low2"=1,"mid"=1,"high2"=1,"high"=1,
                         "type"="Leg.",
                         "study"=c("CCES08","CCES10","CCES12","SRS"))
    
    df_error <- bind_rows(df_error,df_leg)
    df_error$type <- factor(df_error$type, levels = c("Leg.",
                                                      "Part., high exp.",
                                                      "Part., low exp.",
                                                      "Ind., high exp.",
                                                      "Ind., low exp."))
  # Make a plot
    ggplot(df_error) +
      geom_point(aes(x=type,y=mid), size=3)   +
      geom_linerange(aes(x=type,ymin=low2,ymax=high2), size=1.5)   +
      geom_linerange(aes(x=type,ymin=low,ymax=high), size=1.1)   +
      facet_wrap(~study, scales="free") +
      coord_flip() + xlab("")+ ylab("Error Variance")+
      theme_minimal() +
      theme(#panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            panel.background = element_rect("grey98",linetype = "blank"))

  # Save Plot
    ggsave("fig_error_partisian.pdf", width = 6,height = 6,dpi=300)
  
## 1.2) Error Variance Comparsion =========
    

    
    # Function to calulcate Error Variance given 
    sigma_var_diff <- function(gamma, x1, x2){
      exp(x1%*%gamma) - exp(x2%*%gamma)
    }
    
    # Define Scenarios for Plot
    x.sce <- data.frame(
                  cbind(1,c(0,0,1,1),c(0,1,0,1),c(0,0,0,1)),
                  type=c("Ind.,low exp.",
                         "Ind.,high exp.",
                         "Part.,low exp.",
                         "Part.,high exp."))
    
    all_cmb <- expand.grid("type1"= x.sce$type, "type2"= x.sce$type)
    
    all_cmb <- merge(merge(all_cmb,x.sce,by.x = "type1",by.y="type"),
                      x.sce,by.x="type2",by.y="type")

   
    rbind_array <- function(aray) do.call("rbind",lapply(seq(dim(aray)[3]), function(x) aray[ , , x]))
    
    # Calculate resulting CI Intervals from Posterori draws
    df_error_diff <- data.frame(do.call("rbind",
                                   lapply(1:4 ,function(i) 
                                       t(apply(t(apply(rbind_array(rsl[[i]]),1,FUN = function(dat) sigma_var_diff(dat,
                                                     x1=as.matrix(all_cmb[,3:6]),x2=as.matrix(all_cmb[,7:10]))))
                                             ,2,quantile,c(0.025,0.05,0.5,0.95,0.975))))
    ))
    
    # Define Data.frame
    df_error_diff$type1 <- all_cmb$type1
    df_error_diff$type2 <- all_cmb$type2
    df_error_diff$study <- sort(rep(c("CCES08","CCES10","CCES12","SRS"),nrow(all_cmb)))
    
    names(df_error_diff)[1:5] <- c("low2","low","mid","high","high2")
    
      # Make a plot
    ggplot(filter(df_error_diff, type1 != type2, type1 %in% c("Part.,high exp."))) +
      geom_point(aes(x=type2,y=mid, shape=study, group=study),position = position_dodge(0.5), size=3)   +
      geom_linerange(aes(x=type2,ymin=low,ymax=high, group=study),position = position_dodge(0.5) , size=1.5)   +
      geom_linerange(aes(x=type2,ymin=low2,ymax=high2, group=study),position = position_dodge(0.5) ,size=1.1)   +
      geom_hline(yintercept =0,col="red",alpha=0.2) +
      facet_wrap(~type1, scales="free") +
      coord_flip() + xlab("")+ ylab("Difference in Error Variance")+
      theme_minimal() +
      theme(#panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_rect("grey98",linetype = "blank"))
    
    # Save Plot
    ggsave("fig_error_partisian_diff.pdf", width = 6,height = 3,dpi=300)
    
    
## 2) Estimated Item Response Curve ============
    
    # Prepare Data
    
    # Specific item response curve function
    irc <- function(alpha,beta,sigma,theta){
      pnorm((alpha + beta * theta)/(sigma))
    }
    
    # Prepare Data
    rls <- list(res_2008_m2,res_2010_m2, res_2012_m2, res_SRS2_m2)
    df_item <- do.call("rbind",lapply(1:length(rls),function(i) data.frame("item"=1:dim(rls[[i]]$beta)[2],
                                                                           "beta"=apply(rbind_array(rls[[i]]$beta),2,median),
                                                                           "alpha"=apply(rbind_array(rls[[i]]$alpha),2,median),
                                                                           "study"=i)))
    df_item$study <- factor(df_item$study, labels = c("CCES08","CCES10","CCES12","SRS"))
    
    
    df_item <- left_join(df_item,items, by=c("study","item"))
    
    
    df_error$study <- as.factor(df_error$study)
    df_par <- left_join(df_item,dplyr::select(df_error, type, study, "sigma"= mid))
    df_full <- crossing(df_par, data.frame("theta"=seq(from = -3, to=3,by=0.1)))
    
    
    # Specific item response curve function
    irc <- function(alpha,beta,sigma,theta){
      pnorm((alpha + beta * theta)/(sigma))
    }
    
    df_full <- mutate(df_full, prob = irc(alpha,beta,sigma,theta))
    
    
    # Make Plot for all CCES Studies Separatly
    study_sel  <- c("CCES08")
    
    # Filter
    filter(df_full,
           study==study_sel,
           type %in% c("Leg.",
                       "Part., high exp.",
                       "Ind., low exp.")) %>%
      # Plot
      ggplot() +
      geom_line(aes(x=theta,y=prob, linetype=type)) +
      facet_wrap(~ (short))  +
      xlab("Position") + ylab("Probability") +
      theme_minimal() +
      theme(panel.grid.major = element_blank(), 
            panel.background = element_rect("grey97",linetype = "blank"),
            legend.title=element_blank(),
            legend.position = "top")
    
    # Save 
    ggsave(file="fig_irc_cces08_m2.pdf",width=6,height =6,dpi=300)    

## 1.2) Error Variance fr Partisians =========
    
  # Bind Gamma Estimates in List
    rsl <- list(res_2008_m3$gamma,res_2010_m3$gamma,res_2012_m3$gamma,res_SRS2_m4$gamma
    )
    
    # Define Scenarios for Plot
    x.sce <- cbind(1,c(0,0,1,1,0,0),
                     c(0,0,0,0,1,1),
                     c(0,1,0,1,0,1),
                     c(0,0,0,1,0,0),
                     c(0,0,0,0,0,1)
                   )
    
    rbind_array <- function(aray) do.call("rbind",lapply(seq(dim(aray)[3]), function(x) aray[ , , x]))
    
    # Calculate resulting CI Intervals from Posterori draws
    df_error <- data.frame(do.call("rbind",
                                   lapply(1:4 ,function(i) 
                                     t(apply(t(apply(rbind_array(rsl[[i]]),1,sigma_var,x=x.sce)),2,quantile,c(0.025,0.05,0.5,0.95,0.975))))
    ))
    
    # Define Data.frame
    names(df_error) <- c("low","low2","mid","high2","high")
    df_error$type <- c("Ind., low exp.", "Ind., high exp.",
                       "Dem., low exp.","Dem., high exp.",
                       "Rep., low exp.","Rep., high exp."
                       ) 
    df_error$study <- sort(rep(c("CCES 08","CCES 10","CCES 12","SRS"),6))
    
    df_leg <- data.frame("low"=1,"low2"=1,"mid"=1,"high2"=1,"high"=1,
                         "type"="Leg.",
                         "study"=c("CCES 08","CCES 10","CCES 12","SRS"))
    
    df_error <- bind_rows(df_error,df_leg)
    df_error$type <- factor(df_error$type, levels = c("Leg.",
                                                      "Dem., high exp.",
                                                      "Rep., high exp.",
                                                      "Ind., high exp.",
                                                      "Dem., low exp.",
                                                      "Rep., low exp.",
                                                      "Ind., low exp."))
    # Make a plot
    ggplot(df_error) +
      geom_point(aes(x=type,y=mid), size=3)   +
      geom_linerange(aes(x=type,ymin=low2,ymax=high2), size=1.5)   +
      geom_linerange(aes(x=type,ymin=low,ymax=high), size=1.1)   +
      facet_wrap(~study, scales="free") +
      coord_flip() + xlab("")+ ylab("Error Variance")+
      theme_minimal() +
      theme(panel.grid.major = element_blank(),
            panel.background = element_rect("grey97",linetype = "blank"))
    
    # Save Plot
    ggsave("fig_error_demRep.pdf", width = 12,height = 5,dpi=300)
    
    
